library(vars)
library(lmtest) # линейные регрессии
library("ggplot2") # графики
library("dplyr") # манипуляции с таблицами
library(glmnet) # линейные регрессии
library(tidyverse)# обработка данных
library(car)# для vif
totaldannye <-read.csv("totaldannye.csv", header = TRUE)
view(totaldannye)
summary(totaldannye)
## region time plan_value_hps plan_value_tps
## Length:118848 Min. : 0.00 Min. : 0 Min. : 307.7
## Class :character 1st Qu.: 5.75 1st Qu.: 0 1st Qu.: 2388.8
## Mode :character Median :11.50 Median : 21 Median : 3041.8
## Mean :11.50 Mean :1660 Mean : 4046.8
## 3rd Qu.:17.25 3rd Qu.:1682 3rd Qu.: 4720.3
## Max. :23.00 Max. :8056 Max. :12596.5
## tech_min_hps tech_min_tps technology_min_hps technology_min_tps
## Min. :-1320.0 Min. : 214.5 Min. : 0.0 Min. : 272.5
## 1st Qu.: -104.8 1st Qu.:1360.0 1st Qu.: 0.0 1st Qu.:2016.1
## Median : 0.0 Median :1953.3 Median : 0.0 Median :2592.9
## Mean : -276.2 Mean :2675.2 Mean : 495.0 Mean :3276.5
## 3rd Qu.: 1.0 3rd Qu.:3246.3 3rd Qu.: 576.8 3rd Qu.:4255.9
## Max. : 220.4 Max. :7814.6 Max. :2835.2 Max. :9861.6
## tech_max_hps tech_max_tps plan_value plan_exp
## Min. : 0.0 Min. : 442.5 Min. : 2129 Min. : 33.77
## 1st Qu.: 52.5 1st Qu.: 2762.7 1st Qu.: 3238 1st Qu.: 401.58
## Median : 650.0 Median : 3404.2 Median : 4667 Median : 647.62
## Mean :2332.2 Mean : 4717.2 Mean : 6622 Mean : 789.77
## 3rd Qu.:2500.4 3rd Qu.: 5800.7 3rd Qu.: 9051 3rd Qu.:1071.03
## Max. :9045.0 Max. :13579.7 Max. :19062 Max. :2536.07
## plan_imp price_demand price_supply ov_plan_value
## Min. : 17.36 Min. : 0 Min. : 0 Min. : 2294
## 1st Qu.: 968.31 1st Qu.:1046 1st Qu.:1012 1st Qu.: 3928
## Median :1533.54 Median :1378 Median :1340 Median : 5192
## Mean :2332.96 Mean :1358 Mean :1321 Mean : 7096
## 3rd Qu.:3220.07 3rd Qu.:1652 3rd Qu.:1625 3rd Qu.: 9270
## Max. :8165.45 Max. :3169 Max. :2923 Max. :19546
## date infl price_demand_real price_supply_real
## Length:118848 Min. :1.000 Min. : 0.0 Min. : 0
## Class :character 1st Qu.:1.070 1st Qu.: 897.3 1st Qu.: 865
## Mode :character Median :1.221 Median :1154.8 Median :1128
## Mean :1.186 Mean :1146.0 Mean :1115
## 3rd Qu.:1.271 3rd Qu.:1396.6 3rd Qu.:1375
## Max. :1.410 Max. :2685.9 Max. :2568
## real_usd_rub oil_rub_real coal_rub_real al_rub_real
## Min. :0.5879 Min. : 2710 Min. : 10836 Min. :1601
## 1st Qu.:0.8600 1st Qu.: 3765 1st Qu.: 17273 1st Qu.:1787
## Median :0.9039 Median : 4517 Median : 20727 Median :1987
## Mean :0.8998 Mean : 4566 Mean : 53996 Mean :2106
## 3rd Qu.:0.9708 3rd Qu.: 5101 3rd Qu.: 37538 3rd Qu.:2377
## Max. :1.4238 Max. :10268 Max. :257904 Max. :3404
## steel_rub_real weather_t weather_u weather_t3
## Min. : 32419 Min. :-25.750 Min. : 6.69 Min. :-25.770
## 1st Qu.: 54296 1st Qu.: -4.200 1st Qu.: 34.02 1st Qu.: -4.020
## Median : 69137 Median : 1.195 Median : 40.39 Median : 1.401
## Mean : 75136 Mean : 1.700 Mean : 46.14 Mean : 2.022
## 3rd Qu.: 97563 3rd Qu.: 7.450 3rd Qu.: 48.11 3rd Qu.: 7.805
## Max. :135128 Max. : 31.740 Max. :100.00 Max. : 33.070
## weather_u3 weather_t6 weather_u6 weather_t12
## Min. : 5.322 Min. :-25.290 Min. : 5.53 Min. :-24.790
## 1st Qu.: 32.528 1st Qu.: -4.028 1st Qu.: 32.66 1st Qu.: -4.004
## Median : 40.110 Median : 1.420 Median : 39.97 Median : 1.484
## Mean : 45.043 Mean : 2.023 Mean : 45.04 Mean : 2.023
## 3rd Qu.: 47.680 3rd Qu.: 7.820 3rd Qu.: 47.46 3rd Qu.: 7.898
## Max. :100.000 Max. : 32.650 Max. :100.00 Max. : 31.740
## weather_u12 weather_t24 weather_u24 day_of_the_week
## Min. : 6.69 Min. :-23.690 Min. : 8.109 Length:118848
## 1st Qu.: 32.83 1st Qu.: -4.013 1st Qu.: 33.120 Class :character
## Median : 39.60 Median : 1.533 Median : 39.150 Mode :character
## Mean : 45.04 Mean : 2.021 Mean : 45.057
## 3rd Qu.: 47.06 3rd Qu.: 8.007 3rd Qu.: 46.776
## Max. :100.00 Max. : 28.200 Max. :100.000
## holiday month year
## Min. :0 Length:118848 Min. :2020
## 1st Qu.:0 Class :character 1st Qu.:2021
## Median :0 Mode :character Median :2022
## Mean :0 Mean :2022
## 3rd Qu.:0 3rd Qu.:2023
## Max. :0 Max. :2024
totaldannye$time <- as.character(totaldannye$time)
totaldannye$day_of_the_week = substr(totaldannye$day_of_the_week, 1, 1)
totaldannye$day_of_the_week <- as.character(totaldannye$day_of_the_week)
totaldannye <- subset(totaldannye,select=-price_demand)
totaldannye <- subset(totaldannye,select=-price_supply)
totaldannye <- subset(totaldannye,select=-plan_value)
totaldannye <- subset(totaldannye,select=-month)
totaldannye <- subset(totaldannye,select=-year)
#totaldannye <- subset(totaldannye,select=-infl)
totaldannye <- subset(totaldannye,select=-technology_min_hps)
totaldannye <- subset(totaldannye,select=-technology_min_tps)
totaldannye <- subset(totaldannye,select=-weather_t3)
totaldannye <- subset(totaldannye,select=-weather_t6)
totaldannye <- subset(totaldannye,select=-weather_t12)
totaldannye <- subset(totaldannye,select=-weather_t24)
totaldannye <- subset(totaldannye,select=-weather_u3)
totaldannye <- subset(totaldannye,select=-weather_u6)
totaldannye <- subset(totaldannye,select=-weather_u12)
totaldannye <- subset(totaldannye,select=-weather_u24)
# Modified function to replace outliers with neighbor averages
smooth_outliers <- function(region_data, region_name) {
# Select only numeric columns
numeric_cols <- sapply(region_data, is.numeric)
numeric_data <- region_data[, numeric_cols]
# Create directory for plots if it doesn't exist
if (!dir.exists("boxplots")) {
dir.create("boxplots")
}
# Create boxplots for each numeric variable and save to file
pdf(file = paste0("boxplots/", region_name, "_boxplots.pdf"), width = 11, height = 8)
par(mfrow = c(3, 3))
for (col in names(numeric_data)) {
boxplot(numeric_data[[col]], main = paste(region_name, col))
}
dev.off()
# Replace outliers with neighbor averages (3-point moving average)
smoothed_data <- region_data
for (col in names(numeric_data)) {
if (length(unique(numeric_data[[col]])) > 1) { # Skip if constant
# Identify outliers
lower_bound <- quantile(numeric_data[[col]], 0.005, na.rm = TRUE)
upper_bound <- quantile(numeric_data[[col]], 0.995, na.rm = TRUE)
outliers <- which(numeric_data[[col]] < lower_bound | numeric_data[[col]] > upper_bound)
# Replace outliers with moving average
for (i in outliers) {
start_idx <- max(1, i-1)
end_idx <- min(nrow(numeric_data), i+1)
neighbor_values <- numeric_data[[col]][start_idx:end_idx]
valid_neighbors <- neighbor_values[!neighbor_values %in% numeric_data[[col]][outliers]]
if (length(valid_neighbors) > 0) {
smoothed_data[[col]][i] <- mean(valid_neighbors, na.rm = TRUE)
} else {
smoothed_data[[col]][i] <- NA # If no valid neighbors, set to NA
}
}
}
}
# Remove any remaining NAs
smoothed_data <- na.omit(smoothed_data)
cat(paste("Smoothed outliers in", region_name, ":", length(outliers), "points\n"))
return(smoothed_data)
}
# Apply to each region (replacing the previous clean_region_data calls)
Chelyabinsk_Oblast <- smooth_outliers(
subset(totaldannye, region == "Челябинская область"),
"Chelyabinsk"
)
## Smoothed outliers in Chelyabinsk : 289 points
Irkutsk_Oblast <- smooth_outliers(
subset(totaldannye, region == "Иркутская область"),
"Irkutsk"
)
## Smoothed outliers in Irkutsk : 272 points
Republic_of_Tatarstan <- smooth_outliers(
subset(totaldannye, region == "Республика Татарстан"),
"Tatarstan"
)
## Smoothed outliers in Tatarstan : 298 points
Moscow_Oblast <- smooth_outliers(
subset(totaldannye, region == "Московская область"),
"Moscow"
)
## Smoothed outliers in Moscow : 290 points
core_vars <- Chelyabinsk_Oblast[, c("price_demand_real", "infl", "coal_rub_real", "price_supply_real", "steel_rub_real")]
# Fit VAR model with different lags
var_model <- VAR(core_vars, p = 35, type = "const") # Test lags 1 to 35
plot(var_model)
# Extract AIC/BIC
lag_selection <- VARselect(core_vars, lag.max = 35, type = "const")
print(lag_selection$selection) # Shows optimal lags by AIC, BIC, etc.
## AIC(n) HQ(n) SC(n) FPE(n)
## 29 28 27 29
M1_D <- lm(data = Chelyabinsk_Oblast, price_demand_real ~ time + plan_value_hps + plan_value_tps + tech_min_hps + tech_min_tps + tech_max_hps + tech_max_tps + plan_exp + plan_imp + ov_plan_value + date + infl + price_supply_real + real_usd_rub + oil_rub_real + coal_rub_real + al_rub_real + steel_rub_real + weather_t + weather_u +day_of_the_week + holiday)
for (i in 1:35) {
Chelyabinsk_Oblast[[paste0("price_demand_real_lag", i)]] <- lag(Chelyabinsk_Oblast$price_demand_real, i)
}
# Remove rows with NA values (created by lagging)
Chelyabinsk_Oblast_clean <- na.omit(Chelyabinsk_Oblast)
# Instead of using all planning variables, create:
Chelyabinsk_Oblast_clean$net_plan_effect <- with(Chelyabinsk_Oblast_clean, ov_plan_value - plan_value_tps + plan_exp - plan_imp)
# Replace individual tech variables with:
Chelyabinsk_Oblast_clean$tech_avg_tps <- with(Chelyabinsk_Oblast_clean, tech_min_tps/2 + tech_max_tps/2)
Chelyabinsk_Oblast_clean$tech_range_tps <- with(Chelyabinsk_Oblast_clean, tech_max_tps - tech_min_tps)
# Replace individual tech variables with:
Chelyabinsk_Oblast_clean$tech_avg_hps <- with(Chelyabinsk_Oblast_clean, tech_min_hps/2 + tech_max_hps/2)
Chelyabinsk_Oblast_clean$tech_range_hps <- with(Chelyabinsk_Oblast_clean, tech_max_hps - tech_min_hps)
lag_terms_Chelyabinsk_Oblast <- paste0("price_demand_real_lag", 1:35, collapse = " + ")
base_formula_Chelyabinsk_Oblast <- "price_demand_real ~ time + net_plan_effect +tech_avg_tps + infl + real_usd_rub + oil_rub_real + coal_rub_real + al_rub_real + steel_rub_real + weather_t + weather_u + day_of_the_week + price_demand_real_lag1 + price_demand_real_lag24"
# Fit the model
M1_D_lagged <- lm(formula = base_formula_Chelyabinsk_Oblast, data = Chelyabinsk_Oblast_clean)
summary(M1_D_lagged)
##
## Call:
## lm(formula = base_formula_Chelyabinsk_Oblast, data = Chelyabinsk_Oblast_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -474.64 -18.49 0.27 18.39 476.07
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.621e+01 8.552e+00 3.065 0.002179 **
## time1 -4.873e+00 1.708e+00 -2.853 0.004336 **
## time10 7.040e+01 1.906e+00 36.938 < 2e-16 ***
## time11 7.258e+01 1.906e+00 38.088 < 2e-16 ***
## time12 6.493e+01 1.905e+00 34.083 < 2e-16 ***
## time13 7.339e+01 1.901e+00 38.602 < 2e-16 ***
## time14 7.613e+01 1.905e+00 39.975 < 2e-16 ***
## time15 7.401e+01 1.912e+00 38.710 < 2e-16 ***
## time16 7.638e+01 1.915e+00 39.885 < 2e-16 ***
## time17 6.378e+01 1.917e+00 33.269 < 2e-16 ***
## time18 5.706e+01 1.909e+00 29.887 < 2e-16 ***
## time19 5.430e+01 1.898e+00 28.613 < 2e-16 ***
## time2 1.419e+01 1.729e+00 8.203 2.45e-16 ***
## time20 2.474e+01 1.887e+00 13.112 < 2e-16 ***
## time21 7.607e+00 1.846e+00 4.122 3.77e-05 ***
## time22 -4.165e+01 1.791e+00 -23.262 < 2e-16 ***
## time23 -5.404e+01 1.711e+00 -31.575 < 2e-16 ***
## time3 3.353e+01 1.737e+00 19.301 < 2e-16 ***
## time4 6.612e+01 1.734e+00 38.119 < 2e-16 ***
## time5 1.066e+02 1.721e+00 61.911 < 2e-16 ***
## time6 1.697e+02 1.726e+00 98.284 < 2e-16 ***
## time7 1.569e+02 1.756e+00 89.313 < 2e-16 ***
## time8 1.394e+02 1.820e+00 76.620 < 2e-16 ***
## time9 9.868e+01 1.883e+00 52.414 < 2e-16 ***
## net_plan_effect 3.464e-02 8.969e-03 3.862 0.000113 ***
## tech_avg_tps -5.195e-03 1.088e-03 -4.774 1.81e-06 ***
## infl 1.289e+01 4.199e+00 3.069 0.002149 **
## real_usd_rub -5.970e-01 5.830e+00 -0.102 0.918437
## oil_rub_real -1.722e-04 5.767e-04 -0.299 0.765226
## coal_rub_real -2.702e-05 8.047e-06 -3.358 0.000785 ***
## al_rub_real 2.455e-03 1.520e-03 1.615 0.106412
## steel_rub_real 3.465e-05 1.730e-05 2.003 0.045168 *
## weather_t -8.009e-02 6.335e-02 -1.264 0.206131
## weather_u 1.604e-01 3.609e-02 4.445 8.82e-06 ***
## day_of_the_week2 -2.241e+00 9.249e-01 -2.422 0.015423 *
## day_of_the_week3 -1.450e+00 9.261e-01 -1.566 0.117378
## day_of_the_week4 4.996e-02 9.260e-01 0.054 0.956974
## day_of_the_week5 3.391e+00 9.280e-01 3.654 0.000259 ***
## day_of_the_week6 1.571e+00 9.315e-01 1.686 0.091733 .
## day_of_the_week7 -5.999e+00 9.471e-01 -6.334 2.43e-10 ***
## price_demand_real_lag1 8.758e-01 2.855e-03 306.750 < 2e-16 ***
## price_demand_real_lag24 4.715e-02 2.386e-03 19.761 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40.56 on 27060 degrees of freedom
## Multiple R-squared: 0.9615, Adjusted R-squared: 0.9614
## F-statistic: 1.648e+04 on 41 and 27060 DF, p-value: < 2.2e-16
vif_results <- vif(M1_D_lagged)
print(vif_results)
## GVIF Df GVIF^(1/(2*Df))
## time 5.674458 23 1.038460
## net_plan_effect 1.396172 1 1.181597
## tech_avg_tps 2.793571 1 1.671398
## infl 2.389713 1 1.545870
## real_usd_rub 7.984252 1 2.825642
## oil_rub_real 4.930564 1 2.220487
## coal_rub_real 4.749254 1 2.179278
## al_rub_real 5.294014 1 2.300872
## steel_rub_real 3.976642 1 1.994152
## weather_t 2.959787 1 1.720403
## weather_u 1.500402 1 1.224909
## day_of_the_week 1.095188 6 1.007606
## price_demand_real_lag1 5.728980 1 2.393529
## price_demand_real_lag24 4.001760 1 2.000440
core_vars <- Irkutsk_Oblast[, c("price_demand_real", "infl", "coal_rub_real", "price_supply_real", "steel_rub_real")]
# Fit VAR model with different lags
var_model <- VAR(core_vars, p = 35, type = "const") # Test lags 1 to 35
plot(var_model)
# Extract AIC/BIC
lag_selection <- VARselect(core_vars, lag.max = 35, type = "const")
print(lag_selection$selection) # Shows optimal lags by AIC, BIC, etc.
## AIC(n) HQ(n) SC(n) FPE(n)
## 29 27 2 29
for (i in 1:35) {Irkutsk_Oblast [[paste0("price_demand_real_lag", i)]] <- lag(Irkutsk_Oblast$price_demand_real, i)}
# Remove rows with NA values (created by lagging)
Irkutsk_Oblast_clean <- na.omit(Irkutsk_Oblast)
# Instead of using all planning variables, create:
Irkutsk_Oblast_clean$net_plan_effect <- with(Irkutsk_Oblast_clean, ov_plan_value - plan_value_tps + plan_exp - plan_imp)
# Replace individual tech variables with:
Irkutsk_Oblast_clean$tech_avg_tps <- with(Irkutsk_Oblast_clean, tech_min_tps/2 + tech_max_tps/2)
Irkutsk_Oblast_clean$tech_range_tps <- with(Irkutsk_Oblast_clean, tech_max_tps - tech_min_tps)
# Replace individual tech variables with:
Irkutsk_Oblast_clean$tech_avg_hps <- with(Irkutsk_Oblast_clean, tech_min_hps/2 + tech_max_hps/2)
Irkutsk_Oblast_clean$tech_range_hps <- with(Irkutsk_Oblast_clean, tech_max_hps - tech_min_hps)
Irkutsk_Oblast_clean$tech_avg <- with(Irkutsk_Oblast_clean, tech_avg_hps + tech_avg_tps)
lag_terms_Irkutsk_Oblast <- paste0("price_demand_real_lag", 1:35, collapse = " + ")
base_formula_Irkutsk_Oblast <- "price_demand_real ~ time + net_plan_effect + infl + tech_avg + real_usd_rub + coal_rub_real + al_rub_real + steel_rub_real + weather_t + weather_u + day_of_the_week+ price_demand_real_lag1 + price_demand_real_lag24 "
# Fit the model
M2_D_lagged <- lm(formula = base_formula_Irkutsk_Oblast, data = Irkutsk_Oblast_clean)
summary(M2_D_lagged)
##
## Call:
## lm(formula = base_formula_Irkutsk_Oblast, data = Irkutsk_Oblast_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -717.88 -12.03 -0.03 11.96 683.36
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.315e+01 7.469e+00 7.115 1.14e-12 ***
## time1 8.506e+00 1.871e+00 4.547 5.47e-06 ***
## time10 5.506e+00 1.920e+00 2.868 0.00414 **
## time11 1.303e+01 1.916e+00 6.799 1.07e-11 ***
## time12 1.870e+01 1.923e+00 9.727 < 2e-16 ***
## time13 2.668e+01 1.931e+00 13.821 < 2e-16 ***
## time14 1.996e+01 1.941e+00 10.285 < 2e-16 ***
## time15 8.057e+00 1.942e+00 4.148 3.36e-05 ***
## time16 8.383e+00 1.937e+00 4.327 1.52e-05 ***
## time17 4.534e+00 1.932e+00 2.346 0.01897 *
## time18 -1.712e+01 1.919e+00 -8.921 < 2e-16 ***
## time19 -1.470e+01 1.901e+00 -7.735 1.07e-14 ***
## time2 1.425e+01 1.866e+00 7.639 2.27e-14 ***
## time20 -5.802e+00 1.900e+00 -3.053 0.00226 **
## time21 -2.238e+00 1.906e+00 -1.174 0.24028
## time22 -3.359e+00 1.901e+00 -1.767 0.07730 .
## time23 -2.759e+00 1.872e+00 -1.474 0.14051
## time3 1.713e+01 1.876e+00 9.133 < 2e-16 ***
## time4 2.182e+01 1.890e+00 11.548 < 2e-16 ***
## time5 2.088e+01 1.908e+00 10.943 < 2e-16 ***
## time6 1.753e+01 1.921e+00 9.125 < 2e-16 ***
## time7 7.947e+00 1.928e+00 4.123 3.76e-05 ***
## time8 1.185e+01 1.930e+00 6.138 8.48e-10 ***
## time9 5.246e+00 1.929e+00 2.720 0.00653 **
## net_plan_effect -8.380e-03 4.976e-04 -16.839 < 2e-16 ***
## infl 4.874e+01 4.010e+00 12.156 < 2e-16 ***
## tech_avg 3.986e-04 1.033e-03 0.386 0.69951
## real_usd_rub -2.011e+01 4.198e+00 -4.791 1.67e-06 ***
## coal_rub_real -2.176e-05 8.058e-06 -2.700 0.00693 **
## al_rub_real -9.421e-04 1.210e-03 -0.779 0.43615
## steel_rub_real 9.308e-05 1.941e-05 4.795 1.63e-06 ***
## weather_t -6.858e-01 8.865e-02 -7.737 1.06e-14 ***
## weather_u -1.116e-01 4.685e-02 -2.381 0.01726 *
## day_of_the_week2 -3.304e+00 1.008e+00 -3.279 0.00104 **
## day_of_the_week3 -1.211e+00 1.011e+00 -1.199 0.23063
## day_of_the_week4 7.814e-01 1.006e+00 0.776 0.43749
## day_of_the_week5 -6.906e-01 1.011e+00 -0.683 0.49454
## day_of_the_week6 1.997e+00 1.019e+00 1.959 0.05016 .
## day_of_the_week7 -1.647e+00 1.016e+00 -1.622 0.10483
## price_demand_real_lag1 9.026e-01 2.667e-03 338.493 < 2e-16 ***
## price_demand_real_lag24 3.332e-02 2.589e-03 12.870 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 44.15 on 26851 degrees of freedom
## Multiple R-squared: 0.9341, Adjusted R-squared: 0.934
## F-statistic: 9510 on 40 and 26851 DF, p-value: < 2.2e-16
vif_results <- vif(M2_D_lagged)
print(vif_results)
## GVIF Df GVIF^(1/(2*Df))
## time 1.948501 23 1.014607
## net_plan_effect 1.864507 1 1.365469
## infl 3.365723 1 1.834591
## tech_avg 5.288970 1 2.299776
## real_usd_rub 3.578163 1 1.891603
## coal_rub_real 4.072753 1 2.018106
## al_rub_real 2.778751 1 1.666959
## steel_rub_real 4.066759 1 2.016621
## weather_t 5.196629 1 2.279612
## weather_u 1.460366 1 1.208456
## day_of_the_week 1.036831 6 1.003019
## price_demand_real_lag1 2.895717 1 1.701681
## price_demand_real_lag24 2.729342 1 1.652072
core_vars <- Republic_of_Tatarstan[, c("price_demand_real", "infl", "coal_rub_real", "price_supply_real", "steel_rub_real")]
# Fit VAR model with different lags
var_model <- VAR(core_vars, p = 35, type = "const") # Test lags 1 to 35
plot(var_model)
# Extract AIC/BIC
lag_selection <- VARselect(core_vars, lag.max = 35, type = "const")
print(lag_selection$selection) # Shows optimal lags by AIC, BIC, etc.
## AIC(n) HQ(n) SC(n) FPE(n)
## 31 28 27 31
for (i in 1:35) {Republic_of_Tatarstan [[paste0("price_demand_real_lag", i)]] <- lag(Republic_of_Tatarstan$price_demand_real, i)}
# Remove rows with NA values (created by lagging)
Republic_of_Tatarstan_clean <- na.omit(Republic_of_Tatarstan)
# Instead of using all planning variables, create:
Republic_of_Tatarstan_clean$net_plan_effect <- with(Republic_of_Tatarstan_clean, ov_plan_value - plan_value_tps + plan_exp - plan_imp)
# Replace individual tech variables with:
Republic_of_Tatarstan_clean$tech_avg <- with(Republic_of_Tatarstan_clean, tech_min_tps/2 + tech_max_tps/2)
Republic_of_Tatarstan_clean$tech_range <- with(Republic_of_Tatarstan_clean, tech_max_tps - tech_min_tps)
lag_terms_Republic_of_Tatarstan <- paste0("price_demand_real_lag", 1:35, collapse = " + ")
base_formula_Republic_of_Tatarstan <- "price_demand_real ~ time + net_plan_effect +tech_avg + infl + oil_rub_real + coal_rub_real + steel_rub_real + weather_t + day_of_the_week + price_demand_real_lag1 + price_demand_real_lag24"
# Fit the model
M3_D_lagged <- lm(formula = base_formula_Republic_of_Tatarstan, data = Republic_of_Tatarstan_clean)
summary(M3_D_lagged)
##
## Call:
## lm(formula = base_formula_Republic_of_Tatarstan, data = Republic_of_Tatarstan_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -498.41 -20.31 0.57 19.94 617.33
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.575e+01 6.682e+00 6.847 7.71e-12 ***
## time1 -1.870e+00 2.054e+00 -0.910 0.36264
## time10 1.204e+02 2.382e+00 50.541 < 2e-16 ***
## time11 1.140e+02 2.381e+00 47.875 < 2e-16 ***
## time12 1.118e+02 2.380e+00 46.973 < 2e-16 ***
## time13 1.175e+02 2.378e+00 49.425 < 2e-16 ***
## time14 1.124e+02 2.378e+00 47.264 < 2e-16 ***
## time15 1.069e+02 2.383e+00 44.854 < 2e-16 ***
## time16 1.106e+02 2.408e+00 45.934 < 2e-16 ***
## time17 1.052e+02 2.424e+00 43.372 < 2e-16 ***
## time18 1.050e+02 2.410e+00 43.585 < 2e-16 ***
## time19 1.074e+02 2.368e+00 45.363 < 2e-16 ***
## time2 2.141e+01 2.087e+00 10.258 < 2e-16 ***
## time20 9.057e+01 2.328e+00 38.899 < 2e-16 ***
## time21 6.762e+01 2.260e+00 29.920 < 2e-16 ***
## time22 -2.522e+01 2.188e+00 -11.531 < 2e-16 ***
## time23 -6.303e+01 2.068e+00 -30.477 < 2e-16 ***
## time3 4.839e+01 2.110e+00 22.935 < 2e-16 ***
## time4 8.485e+01 2.114e+00 40.134 < 2e-16 ***
## time5 1.326e+02 2.088e+00 63.527 < 2e-16 ***
## time6 2.021e+02 2.073e+00 97.509 < 2e-16 ***
## time7 2.164e+02 2.112e+00 102.472 < 2e-16 ***
## time8 1.990e+02 2.236e+00 88.998 < 2e-16 ***
## time9 1.541e+02 2.342e+00 65.821 < 2e-16 ***
## net_plan_effect -1.690e-02 2.635e-03 -6.413 1.45e-10 ***
## tech_avg 2.938e-03 9.348e-04 3.143 0.00168 **
## infl -2.009e+01 4.466e+00 -4.499 6.85e-06 ***
## oil_rub_real -2.930e-05 3.750e-04 -0.078 0.93772
## coal_rub_real -2.246e-05 5.656e-06 -3.970 7.19e-05 ***
## steel_rub_real 9.977e-05 1.827e-05 5.461 4.78e-08 ***
## weather_t 5.686e-01 6.222e-02 9.139 < 2e-16 ***
## day_of_the_week2 -2.479e+00 1.130e+00 -2.193 0.02831 *
## day_of_the_week3 -2.782e+00 1.129e+00 -2.463 0.01377 *
## day_of_the_week4 -6.588e-01 1.127e+00 -0.585 0.55874
## day_of_the_week5 3.390e+00 1.130e+00 3.001 0.00269 **
## day_of_the_week6 -2.658e+00 1.170e+00 -2.271 0.02315 *
## day_of_the_week7 -1.182e+01 1.184e+00 -9.982 < 2e-16 ***
## price_demand_real_lag1 8.551e-01 3.077e-03 277.917 < 2e-16 ***
## price_demand_real_lag24 4.534e-02 2.399e-03 18.898 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48.93 on 26696 degrees of freedom
## Multiple R-squared: 0.9594, Adjusted R-squared: 0.9593
## F-statistic: 1.66e+04 on 38 and 26696 DF, p-value: < 2.2e-16
vif_results <- vif(M3_D_lagged)
print(vif_results)
## GVIF Df GVIF^(1/(2*Df))
## time 7.114545 23 1.043578
## net_plan_effect 2.154616 1 1.467861
## tech_avg 1.928068 1 1.388549
## infl 2.397434 1 1.548365
## oil_rub_real 1.417333 1 1.190518
## coal_rub_real 1.597643 1 1.263979
## steel_rub_real 2.990977 1 1.729444
## weather_t 1.788782 1 1.337453
## day_of_the_week 1.261249 6 1.019530
## price_demand_real_lag1 6.223720 1 2.494739
## price_demand_real_lag24 3.785636 1 1.945671
core_vars <- Moscow_Oblast[, c("price_demand_real", "infl", "coal_rub_real", "price_supply_real", "steel_rub_real")]
# Fit VAR model with different lags
var_model <- VAR(core_vars, p = 35, type = "const") # Test lags 1 to 35
plot(var_model)
# Extract AIC/BIC
lag_selection <- VARselect(core_vars, lag.max = 35, type = "const")
print(lag_selection$selection) # Shows optimal lags by AIC, BIC, etc.
## AIC(n) HQ(n) SC(n) FPE(n)
## 28 28 27 28
for (i in 1:35) {Moscow_Oblast [[paste0("price_demand_real_lag", i)]] <- lag(Moscow_Oblast$price_demand_real, i)}
# Remove rows with NA values (created by lagging)
Moscow_Oblast_clean <- na.omit(Moscow_Oblast)
# Instead of using all planning variables, create:
Moscow_Oblast_clean$net_plan_effect <- with(Moscow_Oblast_clean, ov_plan_value - plan_value_tps + plan_exp - plan_imp)
# Replace individual tech variables with:
Moscow_Oblast_clean$tech_avg <- with(Moscow_Oblast_clean, tech_min_tps/2 + tech_max_tps/2)
Moscow_Oblast_clean$tech_range <- with(Moscow_Oblast_clean, tech_max_tps - tech_min_tps)
lag_terms_Moscow_Oblast <- paste0("price_demand_real_lag", 1:35, collapse = " + ")
base_formula_Moscow_Oblast <- "price_demand_real ~ time + net_plan_effect +tech_avg + infl + coal_rub_real + al_rub_real + steel_rub_real + weather_t + day_of_the_week + price_demand_real_lag1 + price_demand_real_lag24"
# Fit the model
M4_D_lagged <- lm(formula = base_formula_Moscow_Oblast, data = Moscow_Oblast_clean)
summary(M4_D_lagged)
##
## Call:
## lm(formula = base_formula_Moscow_Oblast, data = Moscow_Oblast_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -769.26 -26.55 0.98 26.64 759.91
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.510e+01 8.158e+00 9.206 < 2e-16 ***
## time1 -1.149e+01 2.370e+00 -4.848 1.25e-06 ***
## time10 1.648e+02 2.721e+00 60.581 < 2e-16 ***
## time11 1.400e+02 2.711e+00 51.629 < 2e-16 ***
## time12 1.236e+02 2.712e+00 45.563 < 2e-16 ***
## time13 1.411e+02 2.703e+00 52.219 < 2e-16 ***
## time14 1.296e+02 2.705e+00 47.908 < 2e-16 ***
## time15 1.200e+02 2.716e+00 44.170 < 2e-16 ***
## time16 1.289e+02 2.727e+00 47.264 < 2e-16 ***
## time17 1.288e+02 2.768e+00 46.540 < 2e-16 ***
## time18 1.331e+02 2.779e+00 47.885 < 2e-16 ***
## time19 1.247e+02 2.716e+00 45.892 < 2e-16 ***
## time2 1.558e+01 2.412e+00 6.461 1.06e-10 ***
## time20 1.097e+02 2.670e+00 41.081 < 2e-16 ***
## time21 8.464e+01 2.634e+00 32.134 < 2e-16 ***
## time22 -3.870e+01 2.563e+00 -15.099 < 2e-16 ***
## time23 -8.879e+01 2.401e+00 -36.972 < 2e-16 ***
## time3 4.257e+01 2.450e+00 17.375 < 2e-16 ***
## time4 7.863e+01 2.463e+00 31.924 < 2e-16 ***
## time5 1.280e+02 2.432e+00 52.623 < 2e-16 ***
## time6 2.059e+02 2.403e+00 85.693 < 2e-16 ***
## time7 2.387e+02 2.380e+00 100.286 < 2e-16 ***
## time8 2.584e+02 2.439e+00 105.963 < 2e-16 ***
## time9 2.099e+02 2.623e+00 80.007 < 2e-16 ***
## net_plan_effect 6.925e-03 1.173e-03 5.902 3.64e-09 ***
## tech_avg 2.540e-03 5.241e-04 4.846 1.26e-06 ***
## infl -4.651e+01 4.167e+00 -11.163 < 2e-16 ***
## coal_rub_real -2.543e-05 7.036e-06 -3.615 0.000301 ***
## al_rub_real -5.456e-03 1.524e-03 -3.582 0.000342 ***
## steel_rub_real 1.191e-04 2.377e-05 5.010 5.48e-07 ***
## weather_t 4.489e-01 6.862e-02 6.543 6.15e-11 ***
## day_of_the_week2 -3.884e+00 1.307e+00 -2.971 0.002968 **
## day_of_the_week3 -3.246e+00 1.310e+00 -2.477 0.013258 *
## day_of_the_week4 -1.617e+00 1.304e+00 -1.241 0.214766
## day_of_the_week5 3.057e+00 1.311e+00 2.331 0.019741 *
## day_of_the_week6 -3.764e+00 1.321e+00 -2.849 0.004392 **
## day_of_the_week7 -1.454e+01 1.352e+00 -10.756 < 2e-16 ***
## price_demand_real_lag1 8.545e-01 3.047e-03 280.443 < 2e-16 ***
## price_demand_real_lag24 3.709e-02 2.177e-03 17.034 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 56.85 on 27086 degrees of freedom
## Multiple R-squared: 0.964, Adjusted R-squared: 0.964
## F-statistic: 1.91e+04 on 38 and 27086 DF, p-value: < 2.2e-16
vif_results <- vif(M4_D_lagged)
print(vif_results)
## GVIF Df GVIF^(1/(2*Df))
## time 8.436278 23 1.047451
## net_plan_effect 1.498458 1 1.224115
## tech_avg 4.194912 1 2.048149
## infl 2.300805 1 1.516840
## coal_rub_real 1.857909 1 1.363051
## al_rub_real 2.704869 1 1.644649
## steel_rub_real 3.792091 1 1.947329
## weather_t 4.231530 1 2.057068
## day_of_the_week 1.136027 6 1.010685
## price_demand_real_lag1 6.988660 1 2.643607
## price_demand_real_lag24 3.568952 1 1.889167
Focused ARMA Modeling (Lags 1, 12, 24)
residuals_M1_D <- residuals(M1_D_lagged)
residuals_M2_D <- residuals(M2_D_lagged)
residuals_M3_D <- residuals(M3_D_lagged)
residuals_M4_D <- residuals(M4_D_lagged)
plot(residuals_M1_D)
plot(residuals_M2_D)
plot(residuals_M3_D)
plot(residuals_M4_D)